library(tidyverse)
library(tidycensus)
library(sf)

library(gt)
library(mapview)

# Make sure to read the GEOID as a char!
# Run `get_towns_in_region.R` to get this table.
# We need this for a lot of our work, so we'll load it up early!
towns_in_MAPC <- read_csv(
  "data/processed/towns_in_MAPC_BRMPO.csv",
  col_types = list(col_character()))

Step 1: Fatality Count

We downloaded data from the Fatality Analysis Reporting System (FARS) FTP site for the years 2016 through 2020. We then filtered a specific table within those files (accident.csv) for those crashes that occurred in Massachusetts. We combined those files together for the entire state. We verified checked cities that didn’t appear didn’t have fatal crashes (Wenham, Hamilton, Norfolk, Rockport, and Essex), and modified several town names to make them match other datasets. We start from that simplified file. This operation was done in build_FARS_dataset.rmd).

acc_ma_16_20 <- read_csv("./data/processed/fatal_crashes_ma_2016_2020.csv", show_col_types = FALSE) |> 
  select(STATE, COUNTY, COUNTYNAME, CITY, CITYNAME, FATALS, fars_year, city_join)

We can group the data together by municipality and sum the data to find the 5-year statewide fatality totals. We’ll then need to pare it back to the MPO region.

# Find the fatalities by muni.
acc_bymuni <- acc_ma_16_20 |> 
  group_by(STATE, COUNTYNAME, city_join) |> 
  summarize(total_fatals_16_20 = sum(FATALS))

# Attach whether the community is in MAPC/BRMPO.
acc_MAPC <- acc_bymuni |> 
  full_join(towns_in_MAPC |>
              mutate(NAME_short = toupper(NAME_short)), 
            by = c("city_join" = "NAME_short")) |> 
  filter(!is.na(GEOID)) |> 
  mutate(total_fatals_16_20 = replace_na(total_fatals_16_20, 0))

# Summarize it for the whole region and calculate the rate.
acc_MAPC_summ <- acc_MAPC |> 
  group_by(RPA, MPO) |> 
  summarize(total_fatals_16_20 = sum(total_fatals_16_20)) |> 
  mutate(annual_fatal_rate = total_fatals_16_20 / 5)

# And report the results.
acc_MAPC_summ |>
  ungroup() |> 
  janitor::adorn_totals() |> 
  gt() |> 
  tab_header(title = "Fatal Crashes in MAPC Communities",
             subtitle = "as Reported in the 2016-2020 FARS") |> 
  fmt_number(columns = total_fatals_16_20, decimals = 0) |> 
  fmt_number(columns = annual_fatal_rate, decimals = 1) |> 

  cols_label(total_fatals_16_20 = "Total Fatalities (5 Years)") |> 
  cols_label(annual_fatal_rate = "Average Annual Fatalities") |> 

  tab_source_note(source_note = "Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. FARS = Fatality Analysis Reporting System") |> 
  tab_source_note(source_note = "Source: 2016-2020 FARS Accident Tables (FATALS)")
Fatal Crashes in MAPC Communities
as Reported in the 2016-2020 FARS
RPA MPO Total Fatalities (5 Years) Average Annual Fatalities
MAPC Boston 542 108.4
MAPC/OCPC Old Colony 15 3.0
Total - 557 111.4
Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. FARS = Fatality Analysis Reporting System
Source: 2016-2020 FARS Accident Tables (FATALS)

Information from MassDOT’s Impact Portal suggest that 534 fatalities occurred in the MAPC region: Impact Portal Screenshot showing 534 fatalities between 2016 and 2020 in the MAPC region. This value is relatively close to the FARS-derived values.

Step 2: 5-Year Average Fatality Rate

Download 2020 US Census Data

We will use the census API to download population data from the 2020 US Census.

# vars <- tidycensus::load_variables(year = 2020, dataset = "pl")

# Download our towns from the census.
ma_towns <- get_decennial(
  geography = "county subdivision",
  variables = "P1_001N",
  year = 2020,
  state = "MA",
  geometry = TRUE,
  cb = FALSE
)

MAPC_towns <- ma_towns |> left_join(towns_in_MAPC, by = "GEOID") |> 
  filter(str_detect(RPA, "MAPC"))

Let’s make a quick map to check if that looks right.

MAPC_towns |> mapview(zcol = "RPA")

Summarize 2020 Census Data for the Region

pop_MAPC_summ <- MAPC_towns |> 
  st_drop_geometry() |> 
  group_by(RPA, MPO) |> 
  summarize(total_pop = sum(value)) |>
  ungroup()

pop_MAPC_summ |> 
  janitor::adorn_totals() |> 
  gt() |> 
  tab_header(title = "Population of MAPC and the Boston MPO",
             subtitle = "Based on the 2020 U.S. Census") |> 
  fmt_number(columns = total_pop, decimals = 0) |> 
  cols_label(total_pop = "Total Population") |> 
  tab_source_note(source_note = "Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council") |> 
  tab_source_note(source_note = "Source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)")
Population of MAPC and the Boston MPO
Based on the 2020 U.S. Census
RPA MPO Total Population
MAPC Boston 3,357,194
MAPC/OCPC Old Colony 78,565
Total - 3,435,759
Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council
Source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)

Combine Fatalities and Population

Now we can merge the fatalities and the population to estimate the fatality rate.

fatal_rate <- left_join(acc_MAPC_summ,
                        pop_MAPC_summ,
                         by = c("RPA", "MPO")) |>
  janitor::adorn_totals() |> 
  mutate(
    fatal_per_100k = annual_fatal_rate/total_pop * 100000)

fatal_rate |>   gt() |> 
  tab_header(title = "Fatality Rate in MAPC Communities",
             subtitle = "Based on the 2020 U.S. Census and 2016-2020 FARS data") |> 
  fmt_number(columns = total_pop, decimals = 0) |> 
  fmt_number(columns = total_fatals_16_20, decimals = 0) |> 
  fmt_number(columns = annual_fatal_rate, decimals = 1) |> 
  fmt_number(columns = fatal_per_100k, decimals = 2) |> 

  cols_label(total_fatals_16_20 = "Total Fatalities (5 Years)") |> 
  cols_label(annual_fatal_rate = "Average Annual Fatalities") |> 
  cols_label(total_pop = "Total Population") |> 
  cols_label(fatal_per_100k = "5-Year Average Fatality Rate (per 100,000 persons)") |> 

  tab_source_note(source_note = "Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. FARS = Fatality Analysis Reporting System") |> 
  tab_source_note(source_note = "Population source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)") |> 
  tab_source_note(source_note = "Fatality source: 2016-2020 FARS Accident Tables (FATALS)")
Fatality Rate in MAPC Communities
Based on the 2020 U.S. Census and 2016-2020 FARS data
RPA MPO Total Fatalities (5 Years) Average Annual Fatalities Total Population 5-Year Average Fatality Rate (per 100,000 persons)
MAPC Boston 542 108.4 3,357,194 3.23
MAPC/OCPC Old Colony 15 3.0 78,565 3.82
Total - 557 111.4 3,435,759 3.24
Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. FARS = Fatality Analysis Reporting System
Population source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)
Fatality source: 2016-2020 FARS Accident Tables (FATALS)

Step 3: Underserved Communities

Process Underserved Community Data

We can go to the Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool referenced in the NOFO to obtain underserved census tracts.

The data needs to be cleaned before it’s joined to population data. We start by using the 2010 Census geography as that matches the format of the downloaded data. We’ll identify the places that are DACs using the 2010 geography, then we’ll handle bringing that to 2020 when we have it a little more manageable. Census tracts split and merge over time as the population changes. The Census Bureau likes to keep tracts around 4,000 people.

underserved <- read_csv("./data/base/RAISE_Persistent_Poverty.csv") |>
  janitor::clean_names() |> 
  rename(state = a_state, 
         county = b_county, 
         tract_name = c_census_tract_name, 
         app_cnty  = e_app_county_meets_definition, 
         app_tract = f_app_census_tract_meets_definition, 
         hdc_tract = g_hdc_census_tract_meets_definition) |> 
  mutate(app_cnty  = recode(app_cnty , "No" = 0, "Yes" = 1, "Not Identified" = -1), 
         app_tract = recode(app_tract, "No" = 0, "Yes" = 1, "Not Identified" = -1), 
         hdc_tract = recode(hdc_tract, "No" = 0, "Yes" = 1, "Not Identified" = -1))
  
underserved_filter <- underserved |> 
  filter(state == "Massachusetts") |> 
  mutate(underserved = app_cnty == 1 | app_tract == 1 | hdc_tract == 1)

# The detailed census tract data comes with similar information attached.
# We can use that information to build the join. First we need to download the full data.
# It appears that the definition file used by USDOT for the DACs was based on the 2010 Census 
# geography. 
 
ma_tracts_10 <- get_decennial(
  geography = "tract",
  variables = "P001001",
  year = 2010,
  state = "MA",
  geometry = TRUE,
  cb = FALSE,
  keep_geo_vars = TRUE)

# Just pulling this into a seperate variable so we don't have 
# to go grab the data again if we need to fix things
ma_tracts_10_mod <- ma_tracts_10 |> 
  separate(NAME, into = c("tract_name", "county_name", "state_name"), sep = ",") |> 
  mutate(tract_name = trimws(tract_name),
         county_name = trimws(county_name)) |> 
  left_join(underserved_filter, 
            by = c("tract_name" = "tract_name",
                   "county_name" = "county")
            )

# Make the dataset a little smaller. This isn't strictly necessary.
ma_tracts_10_mod <- ma_tracts_10_mod |> 
  filter(county_name %in% c("Plymouth County", "Norfolk County", "Worcester County",
                         "Middlesex County", "Essex County",    "Suffolk County")) |> 
  filter(underserved == TRUE)

Find the Underserved Tracts in MAPC/Boston MPO

We clip the dataset to contain just those that are within the region’s town boundaries. I was having trouble getting the intersect feature to work as is–I suspect it has something to do with the borders of tracts being counted as an intersection. st_covered_by seems to work though. Without buffering it by 10m, this missed a couple of tracts. This method needs some work to make it more stable.

underserved_inMAPC <- st_join(ma_tracts_10_mod, 
                                      MAPC_towns |> group_by(RPA, MPO) |> summarize() |> st_buffer(10),
                                      join = st_covered_by) |> 
  filter(str_detect(RPA, "MAPC")) |> 
  st_transform(st_crs(ma_tracts_10_mod))

underserved_inMAPC |> 
  select(GEOID10, app_cnty, app_tract, hdc_tract,RPA, MPO) |> 
  st_transform(4326) |> 
  st_write("./data/processed/underserved_inMAPC_2010CensusTracts.shp",
           delete_dsn = TRUE)
## Deleting source `./data/processed/underserved_inMAPC_2010CensusTracts.shp' failed
## Writing layer `underserved_inMAPC_2010CensusTracts' to data source 
##   `./data/processed/underserved_inMAPC_2010CensusTracts.shp' using driver `ESRI Shapefile'
## Writing 121 features with 6 fields and geometry type Multi Polygon.
underserved_inMAPC |> 
  select(GEOID10, app_cnty, app_tract, hdc_tract,RPA, MPO) |>
  st_transform(4326) |> 
  st_write("./data/processed/underserved_inMAPC_2010CensusTracts.geojson",
           delete_dsn = TRUE)
## Deleting source `./data/processed/underserved_inMAPC_2010CensusTracts.geojson' failed
## Writing layer `underserved_inMAPC_2010CensusTracts' to data source 
##   `./data/processed/underserved_inMAPC_2010CensusTracts.geojson' using driver `GeoJSON'
## Writing 121 features with 6 fields and geometry type Multi Polygon.
mapview(ma_tracts_10_mod, col.regions = "black") +
  mapview(underserved_inMAPC, col.regions = "red") + 
  mapview(MAPC_towns, zcol = "MPO")
underserved_inMAPC |> 
  st_drop_geometry() |> 
  group_by(RPA, MPO) |> 
  summarize(DAC_total = sum(value)) |> 
  janitor::adorn_totals() |> 
  gt() |>
  fmt_number(columns = DAC_total, decimals = 0) |> 
  cols_label(DAC_total = "DAC Population") |> 
  tab_header(title = "Population Living in Underserved Census Tracts in Massachusetts (2010 Census)",
             subtitle = "Based on the USDOT's definition of disadvantaged communities ('DACs') from the Justice40 Initiative") |>
    tab_source_note(source_note = "Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. DAC = Disadvantaged Community") |> 
  tab_source_note(source_note = "Classification source: Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool: https://datahub.transportation.gov/stories/s/tsyd-k6ij") |> 
    tab_source_note(source_note = "Population source: 2010 US Census: SF1, P001001 (Total Population)")
Population Living in Underserved Census Tracts in Massachusetts (2010 Census)
Based on the USDOT's definition of disadvantaged communities ('DACs') from the Justice40 Initiative
RPA MPO DAC Population
MAPC Boston 490,109
MAPC/OCPC Old Colony 1,753
Total - 491,862
Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. DAC = Disadvantaged Community
Classification source: Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool: https://datahub.transportation.gov/stories/s/tsyd-k6ij
Population source: 2010 US Census: SF1, P001001 (Total Population)

That’s useful to know, but it’s still the wrong year. We need to go figure out how to deal with the changes between 2010 and 2020.

ma_tracts_20 <- get_decennial(
  geography = "tract",
  variables = "P1_001N",
  year = 2020,
  state = "MA",
  geometry = TRUE,
  cb = FALSE,
  keep_geo_vars = TRUE)

mapview(ma_tracts_20, col.regions = "grey50") + mapview(underserved_inMAPC, col.regions = "red")

Hmm. So our options are to do some sort of allocation based on area or perhaps population. tidycensus::interpolate_pw() might have what we need. Or we can basically say, we’ll take the tracts that essentially match up with what they were trying to do with the initial dataset. We can say any tract that’s MOSTLY covered by one of the tracts will be included. Then we can take a peek and see if it looks good. There aren’t many non-1:1 relations, so this really has a very limited effect on the results.

# We'll start by selecting tracts that touch our tracts. It'll make things quicker.
ma_tracts_20_nearby <- st_intersection(ma_tracts_20, underserved_inMAPC) |> 
  st_set_geometry(NULL) |> 
  select(GEOID)  |>  
  distinct() |> 
  pull() 

# No idea why tidyverse isnt working here. But we want the area of each tract.
# We'll use this to figure out which 2020 tracts to pull in.
ma_tracts_20_near <- ma_tracts_20 |> 
  filter(GEOID %in% ma_tracts_20_nearby)

# Find the area of the full 2020 Census tracts. This will be our total area.
ma_tracts_20_near$area_tract <- st_area(ma_tracts_20_near)

mapview(ma_tracts_20_near) + mapview(underserved_inMAPC, col.regions = "red")
# Intersect the underserved areas from 2010. We're going to use this intersection
# to figure out the proportion of each new tract that's close to the underserved areas.
ma_tracts_20_filter <- st_intersection(ma_tracts_20_near, 
                                       underserved_inMAPC |> select(GEOID10 = GEOID)) |> 
  st_make_valid() # This seems important with some odd geometry that results from that.


ma_tracts_20_filter$area_int <- st_area(ma_tracts_20_filter)

# Let's select a cutoff and see how we do. We call it filter2 because it's a pain
# if we messed up and have to go do that expensive intersection a second time. 
# I'd rather use a little extra memory than wait around. 

ma_tracts_20_filter2 <- ma_tracts_20_filter |> 
  group_by(GEOID) |> 
  mutate(pct_area = units::drop_units(area_int / area_tract)) |> 
  ungroup() |> 
  st_make_valid() |> 
  filter(pct_area > 0.2) |> 
  select(GEOID) |> 
  st_drop_geometry() |> 
  distinct() |> # No need for duplicates here!
  pull()

Ok! Now all that’s left to do is see how that affects our 2020 Census selection. The left maps show the 2020 tracts and the right map show how that compares to the 2010 geography.

m1 <- mapview(ma_tracts_20, col.regions = "grey80", lwd = 1.5) + 
  mapview(ma_tracts_20 |> filter(GEOID %in% ma_tracts_20_filter2)) +
   mapview(underserved_inMAPC, col.regions = "red") 

m2 <- mapview(ma_tracts_10, col.regions = "grey80", lwd = 1.5) + 
  mapview(ma_tracts_20 |> filter(GEOID %in% ma_tracts_20_filter2)) +
   mapview(underserved_inMAPC, col.regions = "red") 

leafsync::sync(m1,m2)

That looks pretty good! We’re not including a sliver of Waltham, but we are including a little extra to the south. This seems to keep most of the idea behind the DACs.

Now we can add up the populations, summarize the information, and be done!

# This is just what we did for the 2010 tracts done for the 2020 tracts! 
underserved_inMAPC_2020 <- st_join(ma_tracts_20 |> filter(GEOID %in% ma_tracts_20_filter2), 
                                      MAPC_towns |> group_by(RPA, MPO) |> summarize() |> st_buffer(10),
                                      join = st_covered_by) |> 
  filter(str_detect(RPA, "MAPC"))

underserved_inMAPC_2020 |> select(GEOID, RPA, MPO) |> 
  st_transform(4326) |> 
  st_write("./data/processed/underserved_inMAPC_2020CensusTracts.shp",
                                                               delete_dsn = TRUE)
## Warning in CPL_write_ogr(obj, dsn, layer, driver,
## as.character(dataset_options), : GDAL Error 1: ./data/processed/
## underserved_inMAPC_2020CensusTracts.shp does not appear to be a file or
## directory.
## Deleting source `./data/processed/underserved_inMAPC_2020CensusTracts.shp' failed
## Writing layer `underserved_inMAPC_2020CensusTracts' to data source 
##   `./data/processed/underserved_inMAPC_2020CensusTracts.shp' using driver `ESRI Shapefile'
## Writing 136 features with 3 fields and geometry type Multi Polygon.
underserved_inMAPC_2020 |> select(GEOID, RPA, MPO) |>
  st_transform(4326) |> 
  st_write("./data/processed/underserved_inMAPC_2020CensusTracts.geojson",
                                                               delete_dsn = TRUE)
## Deleting source `./data/processed/underserved_inMAPC_2020CensusTracts.geojson' failed
## Writing layer `underserved_inMAPC_2020CensusTracts' to data source 
##   `./data/processed/underserved_inMAPC_2020CensusTracts.geojson' using driver `GeoJSON'
## Writing 136 features with 3 fields and geometry type Multi Polygon.
underserved_inMAPC_2020_summ <- underserved_inMAPC_2020 |> 
  st_drop_geometry() |> 
  group_by(RPA, MPO) |> 
  summarize(DAC_total = sum(value))

underserved_inMAPC_2020_summ |> 
  janitor::adorn_totals() |> 
  gt() |>
  fmt_number(columns = DAC_total, decimals = 0) |> 
  cols_label(DAC_total = "DAC Population") |> 
  tab_header(title = "Population Living in Underserved Census Tracts in Massachusetts (2020 Census)",
             subtitle = "Based on the USDOT's definition of disadvantaged communities ('DACs') from the Justice40 Initiative") |>
    tab_source_note(source_note = "Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. DAC = Disadvantaged Community") |> 
  tab_source_note(source_note = "Note: DACs were based on 2010 tract geography, which does not always match 2020 tract geography. CTPS overlayed the 2020 tract geometry over the 2010 DACs in the region. CTPS allocated full tract populations for those tracts that were fuully or substantially covered by a 2010 DAC tract. If a sliver or small amount of a tract was covered, none of the population was included.") |> 
  tab_source_note(source_note = "Classification source: Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool: https://datahub.transportation.gov/stories/s/tsyd-k6ij") |> 
    tab_source_note(source_note = "Population source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)")
Population Living in Underserved Census Tracts in Massachusetts (2020 Census)
Based on the USDOT's definition of disadvantaged communities ('DACs') from the Justice40 Initiative
RPA MPO DAC Population
MAPC Boston 540,288
MAPC/OCPC Old Colony 2,017
Total - 542,305
Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. DAC = Disadvantaged Community
Note: DACs were based on 2010 tract geography, which does not always match 2020 tract geography. CTPS overlayed the 2020 tract geometry over the 2010 DACs in the region. CTPS allocated full tract populations for those tracts that were fuully or substantially covered by a 2010 DAC tract. If a sliver or small amount of a tract was covered, none of the population was included.
Classification source: Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool: https://datahub.transportation.gov/stories/s/tsyd-k6ij
Population source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)

Now it’s time to find the DAC percentages.

DAC_pct <- left_join(underserved_inMAPC_2020_summ,
                        pop_MAPC_summ,
                         by = c("RPA", "MPO")) |>
  janitor::adorn_totals() |> 
  mutate(
    pct_DAC = DAC_total/total_pop)

DAC_pct |>   gt() |>
  tab_header(title = "Population Living in Underserved Census Tracts in Massachusetts (2020 Census)",
             subtitle = "Based on the USDOT's definition of disadvantaged communities ('DACs') from the Justice40 Initiative") |>
  
  fmt_number(columns = DAC_total, decimals = 0) |>
  fmt_number(columns = total_pop, decimals = 0) |>
  fmt_percent(columns = pct_DAC, decimals = 1) |>
  
  cols_label(DAC_total = "DAC Population") |>
  cols_label(total_pop = "Total Population") |>
  cols_label(pct_DAC = "DAC Percentage") |>
  
  tab_source_note(source_note = "Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. DAC = Disadvantaged Community") |>
  tab_source_note(source_note = "Classification source: Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool: https://datahub.transportation.gov/stories/s/tsyd-k6ij") |>
  tab_source_note(source_note = "Population source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)")
Population Living in Underserved Census Tracts in Massachusetts (2020 Census)
Based on the USDOT's definition of disadvantaged communities ('DACs') from the Justice40 Initiative
RPA MPO DAC Population Total Population DAC Percentage
MAPC Boston 540,288 3,357,194 16.1%
MAPC/OCPC Old Colony 2,017 78,565 2.6%
Total - 542,305 3,435,759 15.8%
Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. DAC = Disadvantaged Community
Classification source: Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool: https://datahub.transportation.gov/stories/s/tsyd-k6ij
Population source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)

Step 4: Put it all together

all_info <- left_join(fatal_rate, DAC_pct,
                      by = c("RPA", "MPO", "total_pop"))

# Write out the final results
all_info |> write_csv("./output/Fatals_DAC_MAPC.csv")

# Plot it.
all_info |>
  gt() |> 
    tab_header(title = "Information for SS4A Grant Application",
             subtitle = "Number of Fatalities, Average Per Capita Fatality Rate, and Percentage of Population Living in Underserved Areas") |>
  
  # Fatality Info
  fmt_number(columns = total_pop, decimals = 0) |> 
  fmt_number(columns = total_fatals_16_20, decimals = 0) |> 
  fmt_number(columns = annual_fatal_rate, decimals = 1) |> 
  fmt_number(columns = fatal_per_100k, decimals = 2) |> 
  
  cols_label(total_fatals_16_20 = "Total Fatalities (5 Years)") |> 
  cols_label(annual_fatal_rate = "Average Annual Fatalities") |> 
  cols_label(total_pop = "Total Population") |> 
  cols_label(fatal_per_100k = "5-Year Average Fatality Rate (per 100,000 persons)") |> 
  
  # DAC Info
  fmt_number(columns = DAC_total, decimals = 0) |>
  fmt_percent(columns = pct_DAC, decimals = 1) |>
  cols_label(DAC_total = "DAC Population") |>
  cols_label(total_pop = "Total Population") |>
  cols_label(pct_DAC = "DAC Percentage") |>
  
  tab_spanner(
    label = "Total Fatalities",
    columns = total_fatals_16_20
  ) |> 
  tab_spanner(
    label = "Average Fatality Rate",
    columns = c(annual_fatal_rate, total_pop, fatal_per_100k)
  ) |> 
  tab_spanner(
    label = "Pct. of Population in Underserved Communities",
    columns = c(DAC_total, pct_DAC)
  ) |> 
  
    tab_source_note(source_note = "Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. DAC = Disadvantaged Community") |>
    tab_source_note(source_note = "Fatality source: 2016-2020 FARS Accident Tables (FATALS)") |> 
  tab_source_note(source_note = "Classification source: Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool: https://datahub.transportation.gov/stories/s/tsyd-k6ij") |>
  tab_source_note(source_note = "Population source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)")
Information for SS4A Grant Application
Number of Fatalities, Average Per Capita Fatality Rate, and Percentage of Population Living in Underserved Areas
RPA MPO Total Fatalities Average Fatality Rate Pct. of Population in Underserved Communities
Total Fatalities (5 Years) Average Annual Fatalities Total Population 5-Year Average Fatality Rate (per 100,000 persons) DAC Population DAC Percentage
MAPC Boston 542 108.4 3,357,194 3.23 540,288 16.1%
MAPC/OCPC Old Colony 15 3.0 78,565 3.82 2,017 2.6%
Total - 557 111.4 3,435,759 3.24 542,305 15.8%
Note: RPA = Regional Planning Agency. MPO = Metropolitan Planning Organization. MAPC = Metropolitan Area Planning Council. OCPC = Old Colony Planning Council. DAC = Disadvantaged Community
Fatality source: 2016-2020 FARS Accident Tables (FATALS)
Classification source: Areas of Persistent Poverty Project (APP) and Historically Disadvantaged Community (HDC) Status Tool: https://datahub.transportation.gov/stories/s/tsyd-k6ij
Population source: 2020 U.S. Census: P.L. 94-171, Table: P1 (Total Population, P1_001N)